#This is to get;
#FigureA6a
#FigureA6b
#FigureA6c
#FigureA6d

rm(list = ls())

library("ggplot2")

setwd("/Users/bgpopescu/Dropbox/Legacies_Central_Europe/")
setwd("C:/Users/bogdanp/Dropbox/Legacies_Central_Europe/")

####################
#Step1: Census 1880#
####################

##########
#Read CSV#
##########
data_1880 <- read.csv(file = './data/data_ro_1880.csv')

###################
#Defining Distance#
###################

data_1880$dist1 <- as.numeric( with (data_1880,ifelse(data_1880$treat==1, 1, -1)))
data_1880$dist2 <-data_1880$dist1*data_1880$Ott_Habs_brd_distance

unique(data_1880$Ott_Habs_brd_NEAR_FID)
data_1880$bfe1<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1880$bfe2<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==9, 1, 0)
data_1880$bfe3<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==10, 1, 0)
data_1880$bfe4<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==11, 1, 0)
data_1880$bfe5<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==12, 1, 0)
data_1880$bfe6<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==13, 1, 0)
data_1880$bfe7<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==14, 1, 0)
data_1880$bfe8<-ifelse(data_1880$Ott_Habs_brd_NEAR_FID==15, 1, 0)


#############################
#PLACEBO1: pct_literate_1880#
#############################

#Subset data 
placebo <-data_1880[which(data_1880$dist2>-120 & data_1880$dist2<120),]
placebo$pct_literate
unique(placebo$treat)

#Define range of locations for placebo border
sequence=seq(from=-120, to=120, by=10)

#Create empty variables
p.value=rep(NA, length(sequence))
estimate=rep(NA, length(sequence))
confint1=rep(NA, length(sequence))
confint2=rep(NA, length(sequence))
#Loop over potential placebo locations of border
for(i in 1:length(sequence)){
  placebo$dist3=placebo$dist2+sequence[i]
  placebo$treat2=ifelse(placebo$dist3 > 0, placebo$treat <- 1, placebo$treat <- 0)
  #unique(placebo$treat2)
  model1<-lm(pct_literate ~ treat2 + POINT_X + POINT_Y+
                    Ott_Habs_brd_distance + bfe1 + bfe2 + bfe3 + 
                    bfe4 + bfe5 + bfe6 + bfe7, data = placebo)
  #model1<-lm(pct_no_water~treat2+dist3+treat2*dist3, data=placebo)
  confint1[i]=confint(model1)[2,1]
  confint2[i]=confint(model1)[2,2]
  estimate[i]=coef(model1)[2]
}

placebo_data <- data.frame(sequence, estimate, confint1, confint2)

annotations <- data.frame(
  xpos = c(-Inf,-Inf,Inf,Inf),
  ypos =  c(-Inf, Inf,-Inf,Inf),
  annotateText = c("","← Closer to South",
                   "","Closer to North →"),
  hjustvar = c(0,0,1,1) ,
  vjustvar = c(0,1,0,1)) #<- adjust


pct_literate_1880<-ggplot(data=placebo_data, aes(sequence, estimate))+
  geom_point(size=1.3)+
  geom_point(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
             color= "red", size = 2)+ 
  geom_errorbar(aes(ymin=confint1, ymax=confint2), size=0.2, width = 0)+
  geom_errorbar(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
                aes(ymin=confint1, ymax=confint2), size=0.2, width = 0, color= "red")+
  geom_hline(yintercept = 0)+
  geom_vline(xintercept = 0, color= "red")+
  theme_bw()+
  scale_x_continuous(name="Placebo border distance from actual border (km)", breaks = seq(-120, 120, 20)) +
  scale_y_continuous(name="Estimated effect")+
  ggtitle("Pct. Literate, 1880") +
  theme(axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title=element_text(size=14),
        plot.title = element_text(hjust = 0.5),
        legend.position = c(1, 0),
        #Legend.position values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
        #and c(1,1) corresponds to the "top right" position.
        legend.box.background = element_rect(fill='white'),
        legend.background = element_blank(),
        legend.text=element_text(size=12))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText))+
  geom_text(x=18,y=Inf, hjust=1,vjust=1,label="Actual Border", color="Red")

pct_literate_1880

ggsave(pct_literate_1880, 
       file="./Paper/graphs/figureA6a.jpg", 
       height=10, width=20, 
       units = "cm", dpi=300)


####################
#Step2: Census 1910#
####################
##########
#Read CSV#
##########
data_1910 <- read.csv(file = './data/data_ro_1910.csv')

###################
#Defining Distance#
###################

data_1910$dist1 <- as.numeric( with (data_1910,ifelse(data_1910$treat==1, 1, -1)))
data_1910$dist2 <-data_1910$dist1*data_1910$Ott_Habs_brd_distance

data_1910$bfe1<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1910$bfe2<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==9, 1, 0)
data_1910$bfe3<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==10, 1, 0)
data_1910$bfe4<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==11, 1, 0)
data_1910$bfe5<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==12, 1, 0)
data_1910$bfe6<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==13, 1, 0)
data_1910$bfe7<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==14, 1, 0)
data_1910$bfe8<-ifelse(data_1910$Ott_Habs_brd_NEAR_FID==15, 1, 0)


#############################
#PLACEBO: pct_litterate 1910#
#############################

#Subset data 
placebo <-data_1910[which(data_1910$dist2>-120 & data_1910$dist2<120),]
placebo$pct_literate
unique(placebo$treat)

#Define range of locations for placebo border
sequence=seq(from=-120, to=120, by=10)

#Create empty variables
p.value=rep(NA, length(sequence))
estimate=rep(NA, length(sequence))
confint1=rep(NA, length(sequence))
confint2=rep(NA, length(sequence))
#Loop over potential placebo locations of border
for(i in 1:length(sequence)){
  placebo$dist3=placebo$dist2+sequence[i]
  placebo$treat2=ifelse(placebo$dist3 > 0, placebo$treat <- 1, placebo$treat <- 0)
  #unique(placebo$treat2)
  model1<-lm(pct_literate ~ treat2 + POINT_X + POINT_Y+
               Ott_Habs_brd_distance + bfe1 + bfe2 + bfe3 + 
               bfe4 + bfe5 + bfe6 + bfe7, data = placebo)
  #model1<-lm(pct_no_water~treat2+dist3+treat2*dist3, data=placebo)
  confint1[i]=confint(model1)[2,1]
  confint2[i]=confint(model1)[2,2]
  estimate[i]=coef(model1)[2]
}

placebo_data <- data.frame(sequence, estimate, confint1, confint2)

annotations <- data.frame(
  xpos = c(-Inf,-Inf,Inf,Inf),
  ypos =  c(-Inf, Inf,-Inf,Inf),
  annotateText = c("","← Closer to South",
                   "","Closer to North →"),
  hjustvar = c(0,0,1,1) ,
  vjustvar = c(0,1,0,1)) #<- adjust

abs(placebo_data$sequence)<20
placebo_data$confint2

pct_literate_1910<-ggplot(data=placebo_data, aes(sequence, estimate))+
  geom_point(size=1.3)+
  geom_point(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
             color= "red", size = 2)+ 
  geom_errorbar(aes(ymin=confint1, ymax=confint2), size=0.2, width = 0)+
  geom_errorbar(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
                aes(ymin=confint1, ymax=confint2), size=0.2, width = 0, color= "red")+
  geom_hline(yintercept = 0)+
  geom_vline(xintercept = 0, color= "red")+
  theme_bw()+
  scale_x_continuous(name="Placebo border distance from actual border (km)", breaks = seq(-120, 120, 20)) +
  scale_y_continuous(name="Estimated effect")+
  ggtitle("Pct. Literate, 1910") +
  theme(axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title=element_text(size=14),
        plot.title = element_text(hjust = 0.5),
        legend.position = c(1, 0),
        #Legend.position values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
        #and c(1,1) corresponds to the "top right" position.
        legend.box.background = element_rect(fill='white'),
        legend.background = element_blank(),
        legend.text=element_text(size=12))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText))+
  geom_text(x=18,y=Inf, hjust=1,vjust=1,label="Actual Border", color="Red")

pct_literate_1910

ggsave(pct_literate_1910, 
       file="./Paper/graphs/figureA6b.jpg", 
       height=10, width=20, 
       units = "cm", dpi=300)

####################
#Step3: Census 1930#
####################

##########
#Read CSV#
##########
data_1930 <- read.csv(file = './data/data_ro_1930.csv')

###################
#Defining Distance#
###################
data_1930$dist1 <- as.numeric( with (data_1930,ifelse(data_1930$treat==1, 1, -1)))
data_1930$dist2 <-data_1930$dist1*data_1930$Ott_Habs_brd_distance

data_1930$bfe1<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1930$bfe2<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==9, 1, 0)
data_1930$bfe3<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==10, 1, 0)
data_1930$bfe4<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==11, 1, 0)
data_1930$bfe5<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==12, 1, 0)
data_1930$bfe6<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==13, 1, 0)
data_1930$bfe7<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==14, 1, 0)
data_1930$bfe8<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==15, 1, 0)


############################
#PLACEBO: pct_literate_1930#
############################
#Subset data 
placebo <-data_1930[which(data_1930$dist2>-120 & data_1930$dist2<120),]
placebo$pct_literate
unique(placebo$treat)
placebo$pct_literate

#Define range of locations for placebo border
sequence=seq(from=-120, to=120, by=10)

#Create empty variables
p.value=rep(NA, length(sequence))
estimate=rep(NA, length(sequence))
confint1=rep(NA, length(sequence))
confint2=rep(NA, length(sequence))
#Loop over potential placebo locations of border
for(i in 1:length(sequence)){
  placebo$dist3=placebo$dist2+sequence[i]
  placebo$treat2=ifelse(placebo$dist3 > 0, placebo$treat <- 1, placebo$treat <- 0)
  #unique(placebo$treat2)
  model1<-lm(pct_literate ~ treat2 + POINT_X + POINT_Y+
               Ott_Habs_brd_distance + bfe1 + bfe2 + bfe3 + 
               bfe4 + bfe5 + bfe6 + bfe7, data = placebo)
  #model1<-lm(pct_no_water~treat2+dist3+treat2*dist3, data=placebo)
  confint1[i]=confint(model1)[2,1]
  confint2[i]=confint(model1)[2,2]
  estimate[i]=coef(model1)[2]
}

summary(lm(pct_literate ~ treat2, data=placebo))
unique(placebo$treat2)


placebo_data <- data.frame(sequence, estimate, confint1, confint2)

annotations <- data.frame(
  xpos = c(-Inf,-Inf,Inf,Inf),
  ypos =  c(-Inf, Inf,-Inf,Inf),
  annotateText = c("","← Closer to South",
                   "","Closer to North →"),
  hjustvar = c(0,0,1,1) ,
  vjustvar = c(0,1,0,1)) #<- adjust

abs(placebo_data$sequence)<20
placebo_data$confint2

pct_literate_1930<-ggplot(data=placebo_data, aes(sequence, estimate))+
  geom_point(size=1.3)+
  geom_point(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
             color= "red", size = 2)+ 
  geom_errorbar(aes(ymin=confint1, ymax=confint2), size=0.2, width = 0)+
  geom_errorbar(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
                aes(ymin=confint1, ymax=confint2), size=0.2, width = 0, color= "red")+
  geom_hline(yintercept = 0)+
  geom_vline(xintercept = 0, color= "red")+
  theme_bw()+
  scale_x_continuous(name="Placebo border distance from actual border (km)", breaks = seq(-120, 120, 20)) +
  scale_y_continuous(name="Estimated effect")+
  ggtitle("Pct. Literate, 1930") +
  theme(axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title=element_text(size=14),
        plot.title = element_text(hjust = 0.5),
        legend.position = c(1, 0),
        #Legend.position values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
        #and c(1,1) corresponds to the "top right" position.
        legend.box.background = element_rect(fill='white'),
        legend.background = element_blank(),
        legend.text=element_text(size=12))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText))+
  geom_text(x=18,y=Inf, hjust=1,vjust=1,label="Actual Border", color="Red")

pct_literate_1930

ggsave(pct_literate_1930, 
       file="./Paper/graphs/figureA6c.jpg", 
       height=10, width=20, 
       units = "cm", dpi=300)


####################
#Step4: Census 1948#
####################
##########
#Read CSV#
##########
data_1948 <- read.csv(file = './data/data_ro_1948.csv')

###################
#Defining Distance#
###################
data_1948$dist1 <- as.numeric( with (data_1948,ifelse(data_1948$treat==1, 1, -1)))
data_1948$dist2 <-data_1948$dist1*data_1948$Ott_Habs_brd_distance

data_1948$bfe1<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1948$bfe2<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==2, 1, 0)
data_1948$bfe3<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==3, 1, 0)
data_1948$bfe4<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==4, 1, 0)
data_1948$bfe5<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==5, 1, 0)
data_1948$bfe6<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==6, 1, 0)
data_1948$bfe7<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==7, 1, 0)
data_1948$bfe8<-ifelse(data_1948$Ott_Habs_brd_NEAR_FID==8, 1, 0)

###########################
#PLACEBO: pct_no_water#
##########################

#Subset data 
placebo <-data_1948[which(data_1948$dist2>-120 & data_1948$dist2<120),]
placebo$pct_literate
unique(placebo$treat)
placebo$pct_literate


#Define range of locations for placebo border
sequence=seq(from=-120, to=120, by=10)

#Create empty variables
p.value=rep(NA, length(sequence))
estimate=rep(NA, length(sequence))
confint1=rep(NA, length(sequence))
confint2=rep(NA, length(sequence))
#Loop over potential placebo locations of border
for(i in 1:length(sequence)){
  placebo$dist3=placebo$dist2+sequence[i]
  placebo$treat2=ifelse(placebo$dist3 > 0, placebo$treat <- 1, placebo$treat <- 0)
  #unique(placebo$treat2)
  model1<-lm(pct_literate ~ treat2 + POINT_X + POINT_Y+
               Ott_Habs_brd_distance + bfe1 + bfe2 + bfe3 + 
               bfe4 + bfe5 + bfe6 + bfe7, data = placebo)
  #model1<-lm(pct_no_water~treat2+dist3+treat2*dist3, data=placebo)
  confint1[i]=confint(model1)[2,1]
  confint2[i]=confint(model1)[2,2]
  estimate[i]=coef(model1)[2]
}


placebo_data <- data.frame(sequence, estimate, confint1, confint2)

annotations <- data.frame(
  xpos = c(-Inf,-Inf,Inf,Inf),
  ypos =  c(-Inf, Inf,-Inf,Inf),
  annotateText = c("","← Closer to South",
                   "","Closer to North →"),
  hjustvar = c(0,0,1,1) ,
  vjustvar = c(0,1,0,1)) #<- adjust

pct_literate_1948<-ggplot(data=placebo_data, aes(sequence, estimate))+
  geom_point(size=1.3)+
  geom_point(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
             color= "red", size = 2)+ 
  geom_errorbar(aes(ymin=confint1, ymax=confint2), size=0.2, width = 0)+
  geom_errorbar(data = placebo_data[!(placebo_data$confint1 < 0 & placebo_data$confint2 >0)& abs(placebo_data$sequence)<60,],
                aes(ymin=confint1, ymax=confint2), size=0.2, width = 0, color= "red")+
  geom_hline(yintercept = 0)+
  geom_vline(xintercept = 0, color= "red")+
  theme_bw()+
  scale_x_continuous(name="Placebo border distance from actual border (km)", breaks = seq(-120, 120, 20)) +
  scale_y_continuous(name="Estimated effect")+
  ggtitle("Pct. Literate, 1948") +
  theme(axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title=element_text(size=14),
        plot.title = element_text(hjust = 0.5),
        legend.position = c(1, 0),
        #Legend.position values should be between 0 and 1. c(0,0) corresponds to the "bottom left"
        #and c(1,1) corresponds to the "top right" position.
        legend.box.background = element_rect(fill='white'),
        legend.background = element_blank(),
        legend.text=element_text(size=12))+
  geom_text(data=annotations,aes(x=xpos,y=ypos,hjust=hjustvar,vjust=vjustvar,label=annotateText))+
  geom_text(x=18,y=Inf, hjust=1,vjust=1,label="Actual Border", color="Red")

pct_literate_1948

ggsave(pct_literate_1948, 
       file="./Paper/graphs/figureA6d.jpg", 
       height=10, width=20, 
       units = "cm", dpi=300)
